library(brms)
library(data.table)

# SET WORKING DIRECTORY
setwd("/mnt/c/Users/drmil/Dropbox/White House Lobbying/3YP/APSR Final/Replication Archive")

# READ IN DATA FOR ANALYSIS
clinton_logs_analysis <- fread("clinton_logs_analysis.csv", header = TRUE, stringsAsFactors = FALSE)

model_formula <- brmsformula(mvbind(any_visits_high_leq0,
                                    any_visits_low_leq0) ~ 
                               any_visits_high_leq0_1l +
                               any_visits_low_leq0_1l + 
                               log(lobbyexp_1l+1) + made_donations_l1tol4 + 
                               made_donations_l1tol4:log(amt_donations_1ltol4+1) +
                               industry_pid + inhouse_lobby +
                               ACC + ADV + AER + AGR + ALC + ANI + APP + ART + AUT + 
                               AVI + BAN + BEV + BNK + BUD + CAW + CDT + CHM + CIV + 
                               COM + CON + CPI + CPT + CSP + DEF + DIS + DOC + ECN + 
                               EDU + ENG + ENV + FAM + FIN + FIR + FOO + FOR + FUE + 
                               GAM + GOV + HCR + HOU + IMM + IND + INS + LAW + LBR + 
                               MAN + MAR + MED + MIA + MMM + MON + NAT + PHA + POS + 
                               REL + RES + RET + ROD + RRR + SCI + SMB + SPO + TAX + 
                               TEC + TOB + TOR + TOU + TRA + TRD + TRU + UNM + URB + 
                               UTI + VET + WAS + WEL +
                               (1|q|yearperiod) + 
                               (1|p|crp_orgname:crp_industry) + 
                               (1|r|crp_industry))

# USE MODEL FORMULA TO CREATE DATA FRAME TO ASSIGN PRIORS
priors_df <-get_prior(model_formula,
                      data = clinton_logs_analysis, 
                      family=bernoulli(link = "logit"))

# ASSIGN DIFFUSE PRIORS FOR BETA COEFFICIENTS, VARYING INTERCEPTS FOR EACH
# GROUPING, AND CORRELATION AMONG EACH GROUPING OF VARYING INTERCEPTS
priors_df$prior <- ifelse(priors_df$class=="b", "normal(0, 10)", priors_df$prior)
priors_df$prior <- ifelse(priors_df$class=="cor", "lkj(1)", priors_df$prior)
priors_df$prior <- ifelse(priors_df$class=="sd"|priors_df$class=="Intercept", 
                          "student_t(3, 0, 10)", priors_df$prior)

# FITTING MODEL--NOTE THAT MODEL USES BOTH WITHIN- AND ACROSS-CHAIN 
# PARALLELIZATION
tableSI11_cols1and2 <- brm(model_formula,
                     data= clinton_logs_analysis, 
                     family=bernoulli(link = "logit"), 
                     prior = priors_df,
                     refresh = 10, 
                     control=list(adapt_delta=0.999999999999999,
                                  max_treedepth = 20),
                     threads = threading(threads = 3, static = TRUE), 
                     backend = "cmdstanr",
                     chains = 4, 
                     cores = 4, 
                     iter = 5000, 
                     warmup = 4000,
                     seed = 1989)

summary(tableSI11_cols1and2)

# model diagnostics
# any divergent transitions?
sum(nuts_params(tableSI11_cols1and2, pars = "divergent__")$Value)
# any rhats above 1.1?
sort(rhat(tableSI11_cols1and2), decreasing = TRUE)[1:10]
# neff ratio sufficiently large (>0.001 for all parameters?)
sort(neff_ratio(tableSI11_cols1and2), decreasing = FALSE)[1:10]
save(tableSI11_cols1and2, file="tableSI11_cols1and2.RData")